1 Initial note:

Figure 4A is derived from Dataset 1 (n = 4531). Figures 4B, 4C, 5A, and 5C are based on Dataset 2 (n = 3593 - after removing any sample with extra entries in the intial dataset), while Figure 5B is generated from Dataset 3 (n = 1332 - after removing any sample with extra entries in the intial dataset). Hairtime (pDLMO) distribution is consistent across all datasets. However, we re-plotted it to maintain a consistent structure across markdown sections and display the distribution of the sample from which each figure originates.

2 Figure 4A

2.1 Distribution - linear

rm(list = ls(all = TRUE))
# I could not load the data from the Rda file, so I loaded the data from db_all.Rda
load("06_FinalDataset/ht1.Rda")

suppressMessages(library(ggplot2))
suppressMessages(library(tidyverse))


# hair time - h from midnight (circ plot option commented out in the end of script)
ht1$hair_time <- ifelse(ht1$hair_time > 13, ht1$hair_time -24, ht1$hair_time)

# plot 

mean = mean(ht1$hair_time)
median = median(ht1$hair_time)
print(paste0("Mean of hairtime: ", mean(ht1$hair_time)))
## [1] "Mean of hairtime: -3.37115222554202"
print(paste0("Median of hairtime: ", median(ht1$hair_time)))
## [1] "Median of hairtime: -3.51666666666667"
distr <- 
  ggplot() + 
  geom_histogram(data = ht1, aes(x = hair_time),
                 fill = "grey", color = "black", 
                 breaks = seq(-12,12,.5),
                 alpha = .5, linewidth = .1) +
  scale_y_continuous(name = "Distribution (counts)", breaks = seq(0, 900, 150)) +
  scale_x_continuous(limits = c(-12,12),
                     breaks = seq(-12,12,2),
                     labels = c("12:00","14:00", "16:00",
                                "18:00","20:00", "22:00",
                                "00:00",
                                "02:00", "04:00", "06:00",
                                "08:00", "10:00", "12:00"),
                     name = "Predicted DLMO (hours)") +
  geom_vline(xintercept = mean, color = "red", linetype = "dashed") +
  geom_vline(xintercept = median, color = "blue", linetype = "dashed") +
  # add annotation for vlines
  annotate("text", x = mean+2, y = 800, label = "Mean", color = "red", size = 4) +
  annotate("text", x = median-2, y = 800, label = "Median", color = "blue", size = 4) +
  theme_bw()+
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        legend.position = "none",
        plot.margin = margin(.8, .8, .8, .6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
        axis.line =  element_line(),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = .1),
        axis.text = element_text(size = 12),
        axis.text.x = element_text(angle = 90, vjust = 0.5),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -3),
        axis.title.y = element_text(vjust = 3))
distr

2.2 Distribution - Circular

library(circular)
## 
## Attaching package: 'circular'
## The following objects are masked from 'package:stats':
## 
##     sd, var
library(ggplot2)

# Convert hair_time from hours to radians (for circular calculation)
ht_circular <- circular(ht1$hair_time * (2 * pi / 24), units = "radians", template = "clock24")

# Calculate circular mean and median
circular_mean_rad <- mean.circular(ht_circular)
circular_median_rad <- median.circular(ht_circular)

# Convert circular mean and median back to hours
circular_mean_hour <- as.numeric(circular_mean_rad) * (24 / (2 * pi))
circular_median_hour <- as.numeric(circular_median_rad) * (24 / (2 * pi))

# Adjust the plot code to include circular mean and median
distr <- 
  ggplot() + 
  geom_histogram(data = ht1, aes(x = hair_time),
                 fill = "#66a1ff", color = "black", 
                 breaks = seq(-12, 12, 0.5),
                 alpha = 0.5, linewidth = 0.1) +
  geom_vline(xintercept = circular_mean_hour, color = "red", linetype = "dashed") +
  geom_vline(xintercept = circular_median_hour, color = "blue", linetype = "dashed") +
  annotate("text", x = circular_mean_hour + 0.5, y = max(ht1$hair_time), 
           label = "Circular Mean", color = "red", size = 3, vjust = 5, hjust = 0) +
  annotate("text", x = circular_median_hour - 0.5, y = max(ht1$hair_time), 
           label = "Circular Median", color = "blue", size = 3, vjust = 6, hjust = -0.1) +
  scale_y_continuous(name = "Distribution (counts)", breaks = seq(0, 900, 150)) +
  scale_x_continuous(limits = c(-12, 12),
                     breaks = seq(-12, 12, 2),
                     labels = c("12:00", "14:00", "16:00",
                                "18:00", "20:00", "22:00",
                                "24:00", "02:00", "04:00", "06:00",
                                "08:00", "10:00", "")) +
  coord_polar(start = pi) +  # Use coord_polar instead of coord_radial
  theme_bw() +
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        legend.position = "none",
        plot.margin = margin(0.8, 0.8, 0.8, 0.6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 0.4),
        axis.line = element_line(linewidth = 0.4),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = 0.1),
        axis.text = element_text(size = 12),
        axis.text.x = element_text(vjust = 0.5),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -3),
        axis.title.y = element_text(vjust = 3))

distr

3 Figure 4B

3.1 Distribution

rm(list = ls(all = TRUE)) 
load("06_FinalDataset/ht2.Rda")

dup <- read.csv("06_FinalDataset/db_duplicate.csv") # 3949
colnames(dup) <- c("index", "duplicates")

# filter ht2 if ht2$id_s is in the dup$duplicates
ht2 <- ht2[!(ht2$id_s %in% dup$duplicates), ] # 3593
3949 - 3593
## [1] 356
library(ggplot2)
library(tidyverse)
library(dplyr)

# hair time around midnight
ht2$hair_time <- ifelse(ht2$hair_time > 13, ht2$hair_time -24, ht2$hair_time)

ht2$age_cat_label <- cut(ht2$age, breaks = seq(17,91,3), labels = seq(19, 90, 3))
ht2$age_cat_label <- as.numeric(as.character(ht2$age_cat_label))
ht2$age_cat_label[ht2$age_cat_label == 70] <- 68
ht2$age_cat_label[ht2$age_cat_label == 67] <- 68


ht2$age_cat <- cut(ht2$age, seq(17,71, 3))
ht2$age_cat <- as.character(ht2$age_cat)
ht2$age_cat[ht2$age_cat == "(68,71]"] <- "(65,70]"
ht2$age_cat[ht2$age_cat == "(65,68]"] <- "(65,70]"

# get dots 
means <- ht2 %>% group_by(age_cat_label) %>% summarise(mean_age = mean(age),
                                                       se_hair_time = plotrix::std.error(hair_time),
                                                       mean_hair_time = mean(hair_time),
                                                       n = n())

means$x <- as.numeric(as.character(means$age_cat_label))
means <- means %>% rename(hair_time = mean_hair_time)


# first linear histogram to show distribution used to model (ht2)
mean = mean(ht2$hair_time)
median = median(ht2$hair_time)

print(paste0("Mean of hairtime: ", mean(ht2$hair_time)))
## [1] "Mean of hairtime: -3.35257445032007"
print(paste0("Median of hairtime: ", median(ht2$hair_time)))
## [1] "Median of hairtime: -3.5"
distr2 <- 
  ggplot(ht2) +
  geom_histogram(data = ht2, aes(x = hair_time),
                 fill = "grey", color = "black", 
                 breaks = seq(-12,12,.5),
                 alpha = .5, linewidth = .1) +
  theme_bw() +
  scale_y_continuous(name = "Distribution (counts)", 
                     breaks = seq(0, 500, 100)) +
  scale_x_continuous(limits = c(-12,12),
                     breaks = seq(-12,12,2),
                     labels = c("12:00","14:00", "16:00",
                                "18:00","20:00", "22:00",
                                "00:00",
                                "02:00", "04:00", "06:00",
                                "08:00", "10:00", "12:00"),
                     name = "Predicted DLMO (hours)") +
  geom_vline(xintercept = mean, color = "red", linetype = "dashed") +
  geom_vline(xintercept = median, color = "blue", linetype = "dashed") +
  # add annotation for vlines
  annotate("text", x = mean+1, y = 800, label = "Mean", color = "red", size = 3) +
  annotate("text", x = median-1, y = 800, label = "Median", color = "blue", size = 3) +
  theme_bw()+
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        legend.position = "none",
        plot.margin = margin(.8, .8, .8, .6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),  # Add this line
        axis.line =  element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = .1),
        axis.text = element_text(size = 12),
        axis.text.x = element_text(angle = 90, vjust = 0.5),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -3),
        axis.title.y = element_text(vjust = 3)) 
distr2

3.2 Tests

suppressMessages(library(circular))
suppressMessages(library(CircStats))
suppressMessages(library(dplyr))
suppressMessages(library(Directional))

# Perform correlation test
corr <- cor.test(log(ht2$age), ht2$hair_time)
corr
## 
##  Pearson's product-moment correlation
## 
## data:  log(ht2$age) and ht2$hair_time
## t = -13.587, df = 3591, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2520043 -0.1897992
## sample estimates:
##        cor 
## -0.2211266
# Perform t-test
median(ht2$age)
## [1] 37
ht2$age_group <- ifelse(ht2$age < median(ht2$age), "young", "old")
t.test(ht2$hair_time ~ ht2$age_group)
## 
##  Welch Two Sample t-test
## 
## data:  ht2$hair_time by ht2$age_group
## t = -12.043, df = 3538, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group old and group young is not equal to 0
## 95 percent confidence interval:
##  -0.7258310 -0.5225877
## sample estimates:
##   mean in group old mean in group young 
##           -3.657296           -3.033086
suppressMessages(library(circular))
suppressMessages(library(CircStats))
suppressMessages(library(dplyr))
suppressMessages(library(Directional))

# Define younger and older groups based on median age
median_age <- median(ht2$age, na.rm = TRUE)
ht2$age_group <- ifelse(ht2$age < median_age, "young", "old")

# Extract numeric vectors for young and old
young <- ht2 %>% filter(age_group == "young") %>% pull(hair_time)
old <- ht2 %>% filter(age_group == "old") %>% pull(hair_time)
all <- ht2  %>% pull(hair_time)

# Convert hours to radians (since 24 hours = 2Ï€ radians)
young_rad <- young * (2 * pi / 24)
old_rad <- old * (2 * pi / 24)
all_rad <- all * (2 * pi / 24)

# Ensure all values are within [0, 2Ï€]
young_rad <- (young_rad + 2 * pi) %% (2 * pi)
old_rad <- (old_rad + 2 * pi) %% (2 * pi)
all_rad <- (all_rad + 2 * pi) %% (2 * pi)

# Convert to circular data
young_circ <- circular(young_rad, units = "radians")
old_circ <- circular(old_rad, units = "radians")
all_circ <- circular(all_rad, units = "radians")

# 1. Linear circular correlation
circlin.cor(theta = all_circ, x = ht2$age, rads = TRUE)
##       R-squared      p-value
## [1,] 0.04363605 8.376383e-69
# 1. Perform Watson-Williams test for homogeneity of means
ww_test <- watson.williams.test(list(young_circ, old_circ))
## Warning in watson.williams.test.default(x, group): Concentration parameters
## (6.27, 7.24) not equal between groups. The test might not be applicable
print(ww_test)
## 
##  Watson-Williams test for homogeneity of means
## 
## data:  1 and 2
## F = 146.78, df1 = 1, df2 = 3591, p-value < 2.2e-16
## sample estimates:
## Circular Data: 
## Type = angles 
## Units = radians 
## Template = none 
## Modulo = asis 
## Zero = 0 
## Rotation = counter 
##  mean of 1  mean of 2 
## -0.8009976 -0.9623371
# 2. If concentration parameters differ, perform Rao's Spacing Test (uniformity)
rao_test <- rao.spacing.test(c(young_circ, old_circ))
print(rao_test)
## 
##        Rao's Spacing Test of Uniformity 
##  
## Test Statistic = 310.103 
## P-value < 0.001 
## 
# 3. Non-parametric alternative (circular)
watson_wheeler_test <- watson.wheeler.test(list(young_circ, old_circ))
## Warning in watson.wheeler.test.default(x, group): There are 3095 ties in the data.
##   Ties will be broken appart randomly and may influence the result.
##   Re-run the test several times to check the influence of ties.
print(watson_wheeler_test)
## 
##  Watson-Wheeler test for homogeneity of angles
## 
## data:  1 and 2
## W = 87.885, df = 2, p-value < 2.2e-16
# 4. Convert circular mean differences back to hours for interpretability
young_mean <- mean.circular(young_circ)
old_mean <- mean.circular(old_circ)

# Convert circular means from radians to hours
young_time <- (young_mean * 12 / pi) + 24
old_time <- (old_mean * 12 / pi) + 24

# Calculate the difference in minutes
time_diff_minutes <- (young_time - old_time) * 60
print(paste("Time difference between young and old groups (minutes):", round(time_diff_minutes, 2)))
## [1] "Time difference between young and old groups (minutes): 36.98"

3.3 Model

library(splines)
m2 <- lm(hair_time ~  sex + age, data = ht2)
m1 <- lm(hair_time ~  sex + bs(age), data = ht2)
jtools::summ(m1, digits = 3, confint = T)
Observations 3593
Dependent variable hair_time
Type OLS linear regression
F(4,3588) 56.286
R² 0.059
Adj. R² 0.058
Est. 2.5% 97.5% t val. p
(Intercept) -2.258 -2.491 -2.025 -19.020 0.000
sexF -0.087 -0.191 0.017 -1.634 0.102
bs(age)1 -1.509 -2.150 -0.867 -4.612 0.000
bs(age)2 -1.801 -2.228 -1.374 -8.266 0.000
bs(age)3 -1.055 -1.502 -0.608 -4.626 0.000
Standard errors: OLS

3.4 Checking assumptions

suppressMessages(library(performance))
check_model(m1)

3.4.1 BIC: linear vs. spline model

print(paste0("BIC for spline model: ", BIC(m1)))
## [1] "BIC for spline model: 13318.0384523849"
print(paste0("BIC for linear model: ", BIC(m2)))
## [1] "BIC for linear model: 13364.1917039724"

3.5 Plots

# get lines ----------------------------------------------------

eff_age <- ggeffects::predict_response(m1, margin = "marginalmeans", terms = "age")
eff_sex <- ggeffects::predict_response(m1, margin = "marginalmeans", terms = "sex")
eff <- ggeffects::predict_response(m1, margin = "marginalmeans", terms = c("age", "sex"))
eff <- eff %>% rename(hair_time = predicted)
eff_age <- eff_age %>% rename(hair_time = predicted)


# get dots ----------------------------------------------------

ht2$age_cat_label <- cut(ht2$age, breaks = seq(17,91,3),
                         labels = seq(19, 90, 3))
ht2$age_cat_label <- as.numeric(as.character(ht2$age_cat_label))
ht2$age_cat_label[ht2$age_cat_label == 70] <- 68
ht2$age_cat_label[ht2$age_cat_label == 67] <- 68


ht2$age_cat <- cut(ht2$age, seq(17,71, 3))

ht2$age_cat <- as.character(ht2$age_cat)
ht2$age_cat[ht2$age_cat == "(68,71]"] <- "(65,70]"
ht2$age_cat[ht2$age_cat == "(65,68]"] <- "(65,70]"

# get dots 
means <- ht2 %>% group_by(age_cat_label) %>% summarise(mean_age = mean(age),
                                                        se_hair_time = plotrix::std.error(hair_time),
                                                        mean_hair_time = mean(hair_time),
                                                        n = n())

means$x <- as.numeric(as.character(means$age_cat_label))
means <- means %>% rename(hair_time = mean_hair_time)

as_distr <- 
  ggplot() +
  geom_point(data = means, aes(x = x, 
                               y = hair_time, 
                               size = n), 
             color = "grey75") +
  geom_errorbar(data = means, aes(x = x, 
                                  ymin = hair_time-se_hair_time,
                                  ymax = hair_time+se_hair_time),
                linewidth = .3, width = .4, color = "grey75") + 
  geom_line(data = eff_age, aes(x = x, y = hair_time)) +
  geom_ribbon(data = eff_age, aes(ymin = conf.low, 
                                  ymax = conf.high,
                                  x = x), alpha = .2) +
  theme_bw() +
  scale_x_continuous(name = "Age", breaks = seq(20, 80, 10)) +
  scale_y_continuous("Predicted DLMO (hours)",
                     breaks = c(-4,-3.5,-3, -2.5, -2),
                     labels = c("20:00", "20:30",
                                "21:00", "21:30", "22:00"),
                     limits = c(-4, -2)) +
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        plot.margin = margin(.8, .8, .8, .6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
        axis.line =  element_line(),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = .1),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -2),
        axis.title.y = element_text(vjust = 3),
        legend.position = c(1, 1),
        legend.justification = c(1, 1)) +
  scale_size(name = "Bin sample size", 
             limits = c(80, 400), range = c(1,5),
             breaks = c(100, 200, 300, 400)) 

as_distr

4 Figure 4C

4.1 Distribution

rm(list = ls(all = TRUE)) 
load("06_FinalDataset/ht2.Rda")

dup <- read.csv("06_FinalDataset/db_duplicate.csv") # 3949
colnames(dup) <- c("index", "duplicates")

# filter ht2 if ht2$id_s is in the dup$duplicates
ht2 <- ht2[!(ht2$id_s %in% dup$duplicates), ] # 3593
3949 - 3593
## [1] 356
library(ggplot2)
library(tidyverse)
library(dplyr)

# hair time around midnight
ht2$hair_time <- ifelse(ht2$hair_time > 13, ht2$hair_time -24, ht2$hair_time)

ht2$age_cat_label <- cut(ht2$age, breaks = seq(17,91,3), labels = seq(19, 90, 3))
ht2$age_cat_label <- as.numeric(as.character(ht2$age_cat_label))
ht2$age_cat_label[ht2$age_cat_label == 70] <- 68
ht2$age_cat_label[ht2$age_cat_label == 67] <- 68


ht2$age_cat <- cut(ht2$age, seq(17,71, 3))
ht2$age_cat <- as.character(ht2$age_cat)
ht2$age_cat[ht2$age_cat == "(68,71]"] <- "(65,70]"
ht2$age_cat[ht2$age_cat == "(65,68]"] <- "(65,70]"

# get dots 
means <- ht2 %>% group_by(age_cat_label) %>% summarise(mean_age = mean(age),
                                                       se_hair_time = plotrix::std.error(hair_time),
                                                       mean_hair_time = mean(hair_time),
                                                       n = n())

means$x <- as.numeric(as.character(means$age_cat_label))
means <- means %>% rename(hair_time = mean_hair_time)

print(paste0("Mean of hairtime: ", mean(ht2$hair_time)))
## [1] "Mean of hairtime: -3.35257445032007"
print(paste0("Median of hairtime: ", median(ht2$hair_time)))
## [1] "Median of hairtime: -3.5"
# first linear histogram to show distribution used to model (ht2)
mean = mean(ht2$hair_time)
median = median(ht2$hair_time)
distr2 <- 
  ggplot(ht2) +
  geom_histogram(data = ht2, aes(x = hair_time),
                 fill = "grey", color = "black", 
                 breaks = seq(-12,12,.5),
                 alpha = .5, linewidth = .1) +
  theme_bw() +
  scale_y_continuous(name = "Distribution (counts)", 
                     breaks = seq(0, 500, 100)) +
  scale_x_continuous(limits = c(-12,12),
                     breaks = seq(-12,12,2),
                     labels = c("12:00","14:00", "16:00",
                                "18:00","20:00", "22:00",
                                "00:00",
                                "02:00", "04:00", "06:00",
                                "08:00", "10:00", "12:00"),
                     name = "Predicted DLMO (hours)") +
  geom_vline(xintercept = mean, color = "red", linetype = "dashed") +
  geom_vline(xintercept = median, color = "blue", linetype = "dashed") +
  # add annotation for vlines
  annotate("text", x = mean+1, y = 800, label = "Mean", color = "red", size = 3) +
  annotate("text", x = median-1, y = 800, label = "Median", color = "blue", size = 3) +
  theme_bw()+
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        legend.position = "none",
        plot.margin = margin(.8, .8, .8, .6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),  # Add this line
        axis.line =  element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = .1),
        axis.text = element_text(size = 12),
        axis.text.x = element_text(angle = 90, vjust = 0.5),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -3),
        axis.title.y = element_text(vjust = 3)) 
distr2

4.2 Tests

# Extract numeric vectors for M and F
m <- ht2 %>% filter(sex == "M") %>% pull(hair_time)
f <- ht2 %>% filter(sex == "F") %>% pull(hair_time)

# Perform t-test 
print(t.test(m, f))
## 
##  Welch Two Sample t-test
## 
## data:  m and f
## t = 4.9799, df = 3576.9, p-value = 6.662e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1585422 0.3644475
## sample estimates:
## mean of x mean of y 
## -3.229360 -3.490854
# Convert hours to radians (since 24 hours = 2Ï€ radians)
m_rad <- m * (2 * pi / 24)
f_rad <- f * (2 * pi / 24)
 
# Ensure all values are within [0, 2Ï€]
m_rad <- (m_rad + 2 * pi) %% (2 * pi)
f_rad <- (f_rad + 2 * pi) %% (2 * pi)
 
# Convert to circular data
m_circ <- circular(m_rad, units = "radians")
f_circ <- circular(f_rad, units = "radians")
 
# Perform Watson-Williams test
watson.williams.test(list(m_circ, f_circ))
## 
##  Watson-Williams test for homogeneity of means
## 
## data:  1 and 2
## F = 22.515, df1 = 1, df2 = 3591, p-value = 2.167e-06
## sample estimates:
## Circular Data: 
## Type = angles 
## Units = radians 
## Template = none 
## Modulo = asis 
## Zero = 0 
## Rotation = counter 
##  mean of 1  mean of 2 
## -0.8536555 -0.9180012
print((-0.8551704 * 12 / 3.14159265359) + 24)
## [1] 20.73349
print((-0.9232520 * 12 / 3.14159265359) + 24)
## [1] 20.47344
print((((-0.8551704 * 12 / 3.14159265359) + 24)-((-0.9232520 * 12 / 3.14159265359) + 24)) * 60)
## [1] 15.60315

4.3 Model

# Build models
m1 <- lm(hair_time ~ sex + bs(age), data = ht2)
jtools::summ(m1, digits = 3, confint = T)
Observations 3593
Dependent variable hair_time
Type OLS linear regression
F(4,3588) 56.286
R² 0.059
Adj. R² 0.058
Est. 2.5% 97.5% t val. p
(Intercept) -2.258 -2.491 -2.025 -19.020 0.000
sexF -0.087 -0.191 0.017 -1.634 0.102
bs(age)1 -1.509 -2.150 -0.867 -4.612 0.000
bs(age)2 -1.801 -2.228 -1.374 -8.266 0.000
bs(age)3 -1.055 -1.502 -0.608 -4.626 0.000
Standard errors: OLS

4.3.1 Converting Est. to minutes

convert_to_minutes <- function(coef_value) {
  minutes <- coef_value * 60  # Convert hours to minutes
  seconds <- (minutes - floor(minutes)) * 60  # Extract remaining seconds
  return(sprintf("%d min %d sec", floor(minutes), round(seconds)))
}

coefficients <- c(
  sexF = -0.108
)
# Apply the conversion function
converted_times <- sapply(coefficients, convert_to_minutes)
# Print results
print(converted_times)
##            sexF 
## "-7 min 31 sec"

4.4 Checking assumptions

suppressMessages(library(performance))
check_model(m1)

4.5 Plot

library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(ggpubr)
# Compute emmeans for pairwise comparisons
emmeans_results <- emmeans(m1, ~ sex)
pairwise_comparison <- contrast(emmeans_results, method = "pairwise")

# Extract p-value
p_value_sex <- summary(pairwise_comparison)$p.value

# Compute t-test for hair_time between sexes
t_test_result <- t.test(ht2$hair_time ~ ht2$sex, var.equal = TRUE)
p_value_violin <- t_test_result$p.value

# Get effect estimates for sex
eff_sex <- ggeffects::predict_response(m1, margin = "marginalmeans", terms = "sex")

# Create mod_sex plot
mod_sex <- ggplot() +
  geom_point(data = eff_sex, aes(x = x, y = predicted, color = x), size = 3) +
  geom_errorbar(data = eff_sex, aes(x = x, ymin = conf.low, ymax = conf.high, color = x), 
                linewidth = .4, width = .05) + 
  theme_bw() +
  xlab("Sex") +
  scale_y_continuous("Predicted DLMO (hours)",
                     breaks = c(-4.00, -3.75, -3.5, -3.25, -3),
                     labels = c("19:00", "20:15", "20:30", "20:45", "21:00"),
                     limits = c(-3.8, -3.2)) +  
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        plot.margin = margin(.8, .8, .8, .6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
        legend.position = "none",
        axis.line = element_line(),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = .1),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -2),
        axis.title.y = element_text(vjust = 3)) +
  scale_color_manual(values = c("#6fa8dc", "#d5a6bd"), name = "Working") +
  scale_fill_manual(values = c("#6fa8dc", "#d5a6bd"), name = "Working") +
  stat_pvalue_manual(
    data = data.frame(x = "F", y = 4.5, group1 = "M", group2 = "F", 
                      label = sprintf("p = %.3f", p_value_sex)),
    y.position = -3.3,
    size = 5
  )

mod_sex

4.6 Plot

# Create violin plot
violin <- ggplot(ht2, aes(x = sex, y = hair_time)) + 
  geom_violin(aes(color = sex, fill = sex)) +
  geom_boxplot(color = "black", alpha = .5, width = .5) +
  theme(legend.position = "none") +
  scale_color_manual(values = c("#6fa8dc", "#d5a6bd")) +
  scale_fill_manual(values = c("#6fa8dc", "#d5a6bd")) +
  xlab("Sex") +
  scale_y_continuous("Predicted DLMO (hours)",
                     breaks = c(-10, -7.5, -5, -2.5, 0, 2.5, 5.0),
                     labels = c("14:00", "16:30", "19:00", "21:30", "00:00", "02:30", "05:00"))  +
  theme_bw() +
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        plot.margin = margin(.8, .8, .8, .6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
        legend.position = "none",
        axis.line = element_line(),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = .1),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -2),
        axis.title.y = element_text(vjust = 3)) +
  stat_pvalue_manual(
    data = data.frame(group1 = "M", group2 = "F", 
                      label = sprintf("p = %.3f", p_value_violin)),
    y.position = 5,
    size = 4
  )

violin

5 Figure 5A

5.1 Distribution

# Clear environment
rm(list = ls(all = TRUE)) 

# Load dataset
load("06_FinalDataset/ht2.Rda")


dup <- read.csv("06_FinalDataset/db_duplicate.csv") # 3949
colnames(dup) <- c("index", "duplicates")

# filter ht2 if ht2$id_s is in the dup$duplicates
ht2 <- ht2[!(ht2$id_s %in% dup$duplicates), ] # 3593
3949 - 3593
## [1] 356
# number of NA in ht2$employed
sum(is.na(ht2$employed))
## [1] 0
ht2 <- ht2 %>% filter(!is.na(ht2$employed))

# Load required libraries
library(ggplot2)
library(tidyverse)
library(dplyr)
library(splines)
library(ggpubr)
library(emmeans)

# Adjust hair_time
ht2$hair_time <- ifelse(ht2$hair_time > 13, ht2$hair_time - 24, ht2$hair_time)



# linear histogram to show distribution used to model
# first linear histogram to show distribution used to model (ht2)
mean = mean(ht2$hair_time)
median = median(ht2$hair_time)

print(paste0("Mean of hairtime: ", mean(ht2$hair_time)))
## [1] "Mean of hairtime: -3.35257445032007"
print(paste0("Median of hairtime: ", median(ht2$hair_time)))
## [1] "Median of hairtime: -3.5"
distr2 <- 
  ggplot(ht2) +
  geom_histogram(data = ht2, aes(x = hair_time),
                 fill = "grey", color = "black", 
                 breaks = seq(-12,12,.5),
                 alpha = .5, linewidth = .1) +
  theme_bw() +
  scale_y_continuous(name = "Distribution (counts)", 
                     breaks = seq(0, 500, 100)) +
  scale_x_continuous(limits = c(-12,12),
                     breaks = seq(-12,12,2),
                     labels = c("12:00","14:00", "16:00",
                                "18:00","20:00", "22:00",
                                "00:00",
                                "02:00", "04:00", "06:00",
                                "08:00", "10:00", "12:00"),
                     name = "Hair Time (h)") +
  geom_vline(xintercept = mean, color = "red", linetype = "dashed") +
  geom_vline(xintercept = median, color = "blue", linetype = "dashed") +
  # add annotation for vlines
  annotate("text", x = mean+1, y = 800, label = "Mean", color = "red", size = 3) +
  annotate("text", x = median-1, y = 800, label = "Median", color = "blue", size = 3) +
  theme_bw()+
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        legend.position = "none",
        plot.margin = margin(.8, .8, .8, .6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),  # Add this line
        axis.line =  element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = .1),
        axis.text = element_text(size = 12),
        axis.text.x = element_text(angle = 90, vjust = 0.5),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -3),
        axis.title.y = element_text(vjust = 3)) 
distr2

5.2 Testing

suppressMessages(library(circular))
# Extract numeric vectors for M and F
em <- ht2 %>% filter(employed == "Yes") %>% pull(hair_time)
nem <- ht2 %>% filter(employed == "No") %>% pull(hair_time)

# Perform t-test 
print(t.test(em, nem))
## 
##  Welch Two Sample t-test
## 
## data:  em and nem
## t = -5.4909, df = 491.85, p-value = 6.42e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.6928695 -0.3276857
## sample estimates:
## mean of x mean of y 
## -3.411087 -2.900809
# Convert hours to radians (since 24 hours = 2Ï€ radians)
em_rad <- em * (2 * pi / 24)
nem_rad <- nem * (2 * pi / 24)
 
# Ensure all values are within [0, 2Ï€]
em_rad <- (em_rad + 2 * pi) %% (2 * pi)
nem_rad <- (nem_rad + 2 * pi) %% (2 * pi)
 
# Convert to circular data
em_circ <- circular(em_rad, units = "radians")
nem_circ <- circular(nem_rad, units = "radians")
 
# Perform Watson-Williams test
watson.williams.test(list(em_circ, nem_circ))
## Warning in watson.williams.test.default(x, group): Concentration parameters
## (6.80, 5.13) not equal between groups. The test might not be applicable
## 
##  Watson-Williams test for homogeneity of means
## 
## data:  1 and 2
## F = 35.543, df1 = 1, df2 = 3591, p-value = 2.736e-09
## sample estimates:
## Circular Data: 
## Type = angles 
## Units = radians 
## Template = none 
## Modulo = asis 
## Zero = 0 
## Rotation = counter 
##  mean of 1  mean of 2 
## -0.8983614 -0.7704109
# print((-0.8551704 * 12 / 3.14159265359) + 24)
# print((-0.9232520 * 12 / 3.14159265359) + 24)
#  
# print((((-0.8551704 * 12 / 3.14159265359) + 24)-((-0.9232520 * 12 / 3.14159265359) + 24)) * 60)

5.3 Model

# Build models
m1 <- lm(hair_time ~ sex + bs(age) + employed, data = ht2)
jtools::summ(m1, digits = 3, confint = T)
Observations 3593
Dependent variable hair_time
Type OLS linear regression
F(5,3587) 53.122
R² 0.069
Adj. R² 0.068
Est. 2.5% 97.5% t val. p
(Intercept) -2.369 -2.603 -2.134 -19.828 0.000
sexF -0.113 -0.217 -0.009 -2.134 0.033
bs(age)1 -1.429 -2.068 -0.791 -4.388 0.000
bs(age)2 -1.437 -1.877 -0.997 -6.399 0.000
bs(age)3 -1.362 -1.818 -0.907 -5.865 0.000
employedNo 0.543 0.370 0.715 6.175 0.000
Standard errors: OLS

5.3.1 Conversion of Est. to minutes

convert_to_minutes <- function(coef_value) {
  minutes <- coef_value * 60  # Convert hours to minutes
  seconds <- (minutes - floor(minutes)) * 60  # Extract remaining seconds
  return(sprintf("%d min %d sec", floor(minutes), round(seconds)))
}

coefficients <- c(
  employedNo = 0.564
)
# Apply the conversion function
converted_times <- sapply(coefficients, convert_to_minutes)
# Print results
print(converted_times)
##      employedNo 
## "33 min 50 sec"

5.4 Checking assumptions

suppressMessages(library(performance))
check_model(m1)

5.5 Plot

# Compute emmeans for pairwise comparisons
emmeans_results <- emmeans(m1, ~ employed)
pairwise_comparison <- contrast(emmeans_results, method = "pairwise")

# Extract p-value
p_value_work <- summary(pairwise_comparison)$p.value

# Compute t-test for hair_time between employment groups
t_test_result <- t.test(ht2$hair_time ~ ht2$employed, var.equal = TRUE)
p_value_violin <- t_test_result$p.value

# Get effect estimates
eff_empl <- ggeffects::predict_response(m1, margin = "marginalmeans", terms = "employed")

# Create mod_work plot
mod_work <- ggplot() +
  geom_point(data = eff_empl, aes(x = x,  y = predicted, color = x), size = 3) +
  geom_errorbar(data = eff_empl, aes(x = x, ymin = conf.low, ymax = conf.high, color = x),
                linewidth = .4, width = .05) + 
  theme_bw() +
  xlab("Working") +
  scale_y_continuous("Predicted DLMO (hours)",
                     breaks = c(-3.75, -3.5, -3.25, -3, -2.75, -2.5),
                     labels = c("20:00", "20:30", "20:45", "21:00", "21:15", "21:30"),
                     limits = c(-3.8, -2.7)) +
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        plot.margin = margin(.8, .8, .8, .6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
        legend.position = "none",
        axis.line =  element_line(linewidth = .4),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = .1),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -2),
        axis.title.y = element_text(vjust = 3)) +
  scale_color_manual(values = c("#6D93B0", "#B08A6D"), name = "Working") +
  scale_fill_manual(values = c("#6D93B0", "#B08A6D"), name = "Working") +
  stat_pvalue_manual(
    data = data.frame(x = "Yes", y = 4.5, group1 = "No", group2 = "Yes",
                      label = sprintf("p = %.3f", p_value_work)),
    y.position = -2.75,
    size = 6
  )
mod_work

# Create violin plot
violin <- ggplot(ht2, aes(x = employed, y = hair_time)) + 
  geom_violin(aes(color = employed, fill = employed)) +
  geom_boxplot(color = "black", alpha = .5, width = .5) +
  theme(legend.position = "none") +
  scale_color_manual(values = c("#6D93B0", "#B08A6D")) +
  scale_fill_manual(values = c("#6D93B0", "#B08A6D")) +
  xlab("Working") +
  scale_y_continuous("Predicted DLMO (hours)",
                     breaks = c(-10, -7.5, -5, -2.5, 0, 2.5),
                     labels = c("14:00", "16:30", "19:00", "21:30", "00:00", "02:30"))  +
  theme_bw() +
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        plot.margin = margin(.8, .8, .8, .6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
        legend.position = "none",
        axis.line =  element_line(linewidth = .4),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = .1),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -2),
        axis.title.y = element_text(vjust = 3)) +
  stat_pvalue_manual(
    data = data.frame(group1 = "No", group2 = "Yes",
                      label = sprintf("p = %.3f", p_value_violin)),
    y.position = 5,
    size = 4
  )
violin

6 Figure 5B

6.1 Distribution

rm(list = ls(all = TRUE)) 

library(splines)

load("06_FinalDataset/ht4.Rda")

dup <- read.csv("06_FinalDataset/db_duplicate.csv") # 1450
colnames(dup) <- c("index", "duplicates")

# filter ht2 if ht2$id_s is in the dup$duplicates
ht4 <- ht4[!(ht4$id_s %in% dup$duplicates), ] # 1332
1450 - 1332 # 118
## [1] 118
ht4$hair_time <- ifelse(ht4$hair_time > 13, ht4$hair_time -24, ht4$hair_time)





# plot ----------------------------------------------------

mean = mean(ht4$hair_time)
median = median(ht4$hair_time)
print(paste0("Mean of hairtime: ", mean(ht4$hair_time)))
## [1] "Mean of hairtime: -3.446996996997"
print(paste0("Median of hairtime: ", median(ht4$hair_time)))
## [1] "Median of hairtime: -3.56666666666667"
distr4 <- 
  ggplot(ht4) +
  geom_histogram(data = ht4, aes(x = hair_time),
                 fill = "grey", color = "black", 
                 breaks = seq(-12,12,.5),
                 alpha = .5, linewidth = .1) +
  theme_bw() +
  scale_y_continuous(name = "Distribution (counts)", 
                     breaks = seq(0, 500, 100),
                     limits = c(0, 300)) +
  scale_x_continuous(limits = c(-12,12),
                     breaks = seq(-12,12,2),
                     labels = c("12:00","14:00", "16:00",
                                "18:00","20:00", "22:00",
                                "00:00",
                                "02:00", "04:00", "06:00",
                                "08:00", "10:00", "12:00"),
                     name = "Predicted DLMO (hours)") +
  geom_vline(xintercept = mean, color = "red", linetype = "dashed") +
  geom_vline(xintercept = median, color = "blue", linetype = "dashed") +
  # add annotation for vlines
  annotate("text", x = mean+1, y = 800, label = "Mean", color = "red", size = 3) +
  annotate("text", x = median-1, y = 800, label = "Median", color = "blue", size = 3) +
  theme_bw()+
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        legend.position = "none",
        plot.margin = margin(.8, .8, .8, .6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),  # Add this line
        axis.line =  element_line(),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = .1),
        axis.text = element_text(size = 12),
        axis.text.x = element_text(angle = 90, vjust = 0.5),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -3),
        axis.title.y = element_text(vjust = 3)) 
distr4

6.2 Testing

# ht4 <- ht4 %>% filter(workdays == 5) then sign
t.test(ht4$hair_time~ht4$we_wd, var.equal = T) 
## 
##  Two Sample t-test
## 
## data:  ht4$hair_time by ht4$we_wd
## t = -1.9316, df = 1330, p-value = 0.05363
## alternative hypothesis: true difference in means between group wd and group we is not equal to 0
## 95 percent confidence interval:
##  -0.315385535  0.002446157
## sample estimates:
## mean in group wd mean in group we 
##        -3.517949        -3.361479
t.test(ht4$hair_time~ht4$we_wd, var.equal = F)
## 
##  Welch Two Sample t-test
## 
## data:  ht4$hair_time by ht4$we_wd
## t = -1.9376, df = 1299.4, p-value = 0.05289
## alternative hypothesis: true difference in means between group wd and group we is not equal to 0
## 95 percent confidence interval:
##  -0.314891793  0.001952414
## sample estimates:
## mean in group wd mean in group we 
##        -3.517949        -3.361479
wilcox.test(ht4$hair_time~ht4$we_wd) # sign
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  ht4$hair_time by ht4$we_wd
## W = 206578, p-value = 0.05746
## alternative hypothesis: true location shift is not equal to 0
suppressMessages(library(circular))
ht4$we_wd <- factor(ht4$we_wd, levels = c("wd", "we"), labels = c("Wed-Fri", "Sun"))

# Extract numeric vectors for M and F
wd<- ht4 %>% filter(we_wd == "Wed-Fri") %>% pull(hair_time)
we <- ht4 %>% filter(we_wd == "Sun") %>% pull(hair_time)

# Perform t-test 
print(t.test(wd, we))
## 
##  Welch Two Sample t-test
## 
## data:  wd and we
## t = -1.9376, df = 1299.4, p-value = 0.05289
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.314891793  0.001952414
## sample estimates:
## mean of x mean of y 
## -3.517949 -3.361479
# Convert hours to radians (since 24 hours = 2Ï€ radians)
wd_rad <- wd * (2 * pi / 24)
we_rad <- we * (2 * pi / 24)
 
# Ensure all values are within [0, 2Ï€]
wd_rad <- (wd_rad + 2 * pi) %% (2 * pi)
we_rad <- (we_rad + 2 * pi) %% (2 * pi)
 
# Convert to circular data
wd_circ <- circular(wd_rad, units = "radians")
we_circ <- circular(we_rad, units = "radians")
 
# Perform Watson-Williams test
watson.williams.test(list(wd_circ, we_circ))
## 
##  Watson-Williams test for homogeneity of means
## 
## data:  1 and 2
## F = 3.7795, df1 = 1, df2 = 1330, p-value = 0.05209
## sample estimates:
## Circular Data: 
## Type = angles 
## Units = radians 
## Template = none 
## Modulo = asis 
## Zero = 0 
## Rotation = counter 
##  mean of 1  mean of 2 
## -0.9258741 -0.8852866
# print((-0.8551704 * 12 / 3.14159265359) + 24)
# print((-0.9232520 * 12 / 3.14159265359) + 24)
#  
# print((((-0.8551704 * 12 / 3.14159265359) + 24)-((-0.9232520 * 12 / 3.14159265359) + 24)) * 60)

6.3 Model

m1 <- lm(hair_time ~  sex + bs(age) + we_wd, data = ht4) #%>% filter(employed == "Yes"))
jtools::summ(m1, digits = 3, confint = T)
Observations 1332
Dependent variable hair_time
Type OLS linear regression
F(5,1326) 11.826
R² 0.043
Adj. R² 0.039
Est. 2.5% 97.5% t val. p
(Intercept) -2.914 -3.333 -2.494 -13.639 0.000
sexF -0.057 -0.218 0.103 -0.702 0.483
bs(age)1 -0.304 -1.463 0.856 -0.514 0.607
bs(age)2 -1.898 -2.737 -1.058 -4.436 0.000
bs(age)3 -0.169 -1.194 0.857 -0.322 0.747
we_wdSun 0.197 0.040 0.353 2.468 0.014
Standard errors: OLS
# get effects ----------------------------------------------------

eff_wdwe <- ggeffects::predict_response(m1, margin = "marginalmeans", terms = "we_wd")
# -------------------------------------------------------------- # 

6.3.1 Conversion of Est. to minutes

convert_to_minutes <- function(coef_value) {
  minutes <- coef_value * 60  # Convert hours to minutes
  seconds <- (minutes - floor(minutes)) * 60  # Extract remaining seconds
  return(sprintf("%d min %d sec", floor(minutes), round(seconds)))
}

coefficients <- c(
  we_wdSun = 0.208
)
# Apply the conversion function
converted_times <- sapply(coefficients, convert_to_minutes)
# Print results
print(converted_times)
##        we_wdSun 
## "12 min 29 sec"

6.4 Checking assumptions

suppressMessages(library(performance))
check_model(m1)

6.5 Plot

library(emmeans)
# Compute emmeans for pairwise comparison
emmeans_results <- emmeans(m1, ~ we_wd)
pairwise_comparison <- contrast(emmeans_results, method = "pairwise")

# Extract p-value
p_value_weekday <- summary(pairwise_comparison)$p.value

# Compute t-test for hair_time between weekday groups
t_test_result <- t.test(ht4$hair_time ~ ht4$we_wd, var.equal = TRUE)
p_value_violin <- t_test_result$p.value

# Get effect estimates
eff_wdwe <- ggeffects::predict_response(m1, margin = "marginalmeans", terms = "we_wd")

# Create mod_sampling_date plot
mod_sampling_date <- ggplot() +
  geom_point(data = eff_wdwe, aes(x = x, y = predicted, color = x), size = 3) +
  geom_errorbar(data = eff_wdwe, aes(x = x, ymin = conf.low, ymax = conf.high, color = x),
                linewidth = .4, width = .05) + 
  theme_bw() +
  xlab("Sampling date") +
  scale_y_continuous("Predicted DLMO (hours)",
                     breaks = c(-4.0, -3.75, -3.5, -3.25, -3.0),
                     labels = c("20:00", "20:15", "20:30", "20:45", "21:00"),
                     limits = c(-4.0, -2.95)) +
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        plot.margin = margin(.8, .8, .8, .6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
        legend.position = "none",
        axis.line =  element_line(),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = .1),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -2),
        axis.title.y = element_text(vjust = 3)) +
  scale_color_manual(values = c("#6D93B0", "#B08A6D"), name = "Sampling date") +
  scale_fill_manual(values = c("#6D93B0", "#B08A6D"), name = "") +
  stat_pvalue_manual(
    data = data.frame(x = "Wed-Fri", y = 4, group1 = "Sun", group2 = "Wed-Fri",
                      label = sprintf("p = %.3f", p_value_weekday)),
    y.position = -3.25,
    size = 6
  )


mod_sampling_date

# Create violin plot
violin <- ggplot(ht4, aes(x = we_wd, y = hair_time)) + 
  geom_violin(aes(color = we_wd, fill = we_wd)) +
  geom_boxplot(color = "black", alpha = .5, width = .5) +
  theme(legend.position = "none") +
  scale_color_manual(values = c("#6D93B0", "#B08A6D")) +
  scale_fill_manual(values = c("#6D93B0", "#B08A6D")) +
  xlab("Sampling weekday") +
  scale_y_continuous("Predicted DLMO (hours)",
                     breaks = c(-10, -7.5, -5, -2.5, 0, 2.5, 5.0),
                     labels = c("14:00", "16:30", "19:00", "21:30", "00:00", "02:30", "03:00"),
                     limits = c(-10, 5.1))  +
  theme_bw() +
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        plot.margin = margin(.8, .8, .8, .6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
        legend.position = "none",
        axis.line =  element_line(),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = .1),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -2),
        axis.title.y = element_text(vjust = 3)) +
  stat_pvalue_manual(
    data = data.frame(group1 = "Wed-Fri", group2 = "Sun",
                      label = sprintf("p = %.3f", p_value_violin)),
    y.position = 5,
    size = 4
  )
violin

7 Figure 5C

7.1 Distribution

# Load necessary libraries
library(ggplot2)
library(tidyverse)
library(dplyr)
library(splines)
library(ggpubr)
library(ggeffects)
library(plotrix)
# Clear environment
rm(list = ls(all = TRUE))

# Load dataset and filter
load("06_FinalDataset/ht2.Rda")

# removing duplicates
dup <- read.csv("06_FinalDataset/db_duplicate.csv") # 3949
colnames(dup) <- c("index", "duplicates")

# filter ht2 if ht2$id_s is in the dup$duplicates
ht2 <- ht2[!(ht2$id_s %in% dup$duplicates), ] # 3593
3949 - 3593
## [1] 356
# replace person with ht2$age = 51.5 to 52
ht2$age[ht2$age == 51.5] <- 52

# Adjust hair_time (for values > 13)
ht2$hair_time <- ifelse(ht2$hair_time > 13, ht2$hair_time - 24, ht2$hair_time)

#---------------------- Distribution Plot ----------------------#
mean_val <- mean(ht2$hair_time)
median_val <- median(ht2$hair_time)
print(paste0("Mean of hairtime: ", mean(ht2$hair_time)))
## [1] "Mean of hairtime: -3.35257445032007"
print(paste0("Median of hairtime: ", median(ht2$hair_time)))
## [1] "Median of hairtime: -3.5"
distr2 <- ggplot(ht2) +
  geom_histogram(aes(x = hair_time),
                 fill = "grey", color = "black", 
                 breaks = seq(-12, 12, 0.5),
                 alpha = 0.5, linewidth = 0.1) +
  theme_bw() +
  scale_y_continuous(name = "Distribution (counts)", breaks = seq(0, 500, 100)) +
  scale_x_continuous(limits = c(-12, 12),
                     breaks = seq(-12, 12, 2),
                     labels = c("12:00","14:00", "16:00",
                                "18:00","20:00", "22:00",
                                "00:00","02:00", "04:00", "06:00",
                                "08:00", "10:00", "12:00"),
                     name = "Predicted DLMO (hours)") +
  geom_vline(xintercept = mean_val, color = "red", linetype = "dashed") +
  geom_vline(xintercept = median_val, color = "blue", linetype = "dashed") +
  annotate("text", x = mean_val + 1, y = 800, label = "Mean", color = "red", size = 3) +
  annotate("text", x = median_val - 1, y = 800, label = "Median", color = "blue", size = 3) +
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        legend.position = "none",
        plot.margin = margin(0.8, 0.8, 0.8, 0.6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
        axis.line = element_blank(),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = 0.1),
        axis.text = element_text(size = 12),
        axis.text.x = element_text(angle = 90, vjust = 0.5),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -3),
        axis.title.y = element_text(vjust = 3))
distr2

7.2 Testing

suppressMessages(library(circular))
suppressMessages(library(CircStats))
suppressMessages(library(dplyr))
suppressMessages(library(Directional))

# Pearson correlation test
cor.test(ht2$workdays, ht2$hair_time, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  ht2$workdays and ht2$hair_time
## t = -6.7984, df = 3591, p-value = 1.234e-11
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.14489102 -0.08032144
## sample estimates:
##        cor 
## -0.1127252
# Use this when you suspect the relationship might be monotonic but not strictly linear or when the data are not normally distributed.
cor.test(ht2$workdays, ht2$hair_time, method = "spearman")
## Warning in cor.test.default(ht2$workdays, ht2$hair_time, method = "spearman"):
## Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  ht2$workdays and ht2$hair_time
## S = 8325297350, p-value = 3.925e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## -0.07690994
# his test is especially useful for small sample sizes or when dealing with tied ranks. 
cor.test(ht2$workdays, ht2$hair_time, method = "kendall")
## 
##  Kendall's rank correlation tau
## 
## data:  ht2$workdays and ht2$hair_time
## z = -4.6022, p-value = 4.18e-06
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
##         tau 
## -0.05915278
# Linear regression
model <- lm(hair_time ~ workdays, data = ht2)
summary(model)
## 
## Call:
## lm(formula = hair_time ~ workdays, data = ht2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.0500 -1.0415 -0.1266  0.8901  7.9802 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.89859    0.07174 -40.405  < 2e-16 ***
## workdays    -0.10497    0.01544  -6.798 1.23e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.571 on 3591 degrees of freedom
## Multiple R-squared:  0.01271,    Adjusted R-squared:  0.01243 
## F-statistic: 46.22 on 1 and 3591 DF,  p-value: 1.234e-11
all <- ht2  %>% pull(hair_time)

# Convert hours to radians (since 24 hours = 2Ï€ radians)
all_rad <- all * (2 * pi / 24)

# Ensure all values are within [0, 2Ï€]
all_rad <- (all_rad + 2 * pi) %% (2 * pi)

# Convert to circular data
all_circ <- circular(all_rad, units = "radians")

# 1. Linear circular correlation
circlin.cor(theta = all_circ, x = ht2$workdays, rads = TRUE)
##       R-squared      p-value
## [1,] 0.01471059 1.155618e-23

7.3 Model

# Fit model (using workdays as predictor)
m1 <- lm(hair_time ~ sex + bs(age) + bs(workdays), data = ht2)
m2 <- lm(hair_time ~ sex + bs(age) + workdays, data = ht2)
jtools::summ(m2, digits = 3, confint = T)
Observations 3593
Dependent variable hair_time
Type OLS linear regression
F(5,3587) 55.449
R² 0.072
Adj. R² 0.070
Est. 2.5% 97.5% t val. p
(Intercept) -1.810 -2.073 -1.547 -13.497 0.000
sexF -0.131 -0.235 -0.027 -2.470 0.014
bs(age)1 -1.409 -2.046 -0.771 -4.331 0.000
bs(age)2 -1.392 -1.832 -0.953 -6.211 0.000
bs(age)3 -1.391 -1.845 -0.937 -6.007 0.000
workdays -0.116 -0.148 -0.083 -7.006 0.000
Standard errors: OLS

7.3.1 Conversion of Est. to minutes

convert_to_minutes <- function(coef_value) {
  minutes <- coef_value * 60  # Convert hours to minutes
  seconds <- (minutes - floor(minutes)) * 60  # Extract remaining seconds
  return(sprintf("%d min %d sec", floor(minutes), round(seconds)))
}

coefficients <- c(
  workdays = -0.121
)
# Apply the conversion function
converted_times <- sapply(coefficients, convert_to_minutes)
# Print results
print(converted_times)
##        workdays 
## "-8 min 44 sec"

7.4 Checking assumptions

suppressMessages(library(performance))
check_model(m2)

7.4.1 BIC: linear vs. spline model

print(paste0("BIC for spline model: ", BIC(m1)))
## [1] "BIC for spline model: 13281.1586994802"
print(paste0("BIC for linear model: ", BIC(m2)))
## [1] "BIC for linear model: 13277.3923889866"

7.5 Plot

# Predicted effects for workdays
eff_wd <- ggeffects::predict_response(m2, margin = "marginalmeans", terms = "workdays")
eff_wd <- eff_wd %>% rename(hair_time = predicted)

# Compute means by workdays (for raw hair_time)
means <- ht2 %>% group_by(workdays) %>% 
  summarise(mean_age = mean(age),
            se_hair_time = std.error(hair_time),
            mean_hair_time = mean(hair_time),
            n = n())
means$x <- as.numeric(as.character(means$workdays))

# For adjusted dots: correct hair_time using model predictions
wd_age <- ggeffects::predict_response(m2, margin = "marginalmeans", terms = "age[18:70]")
wd_all <- ggeffects::predict_response(m2, margin = "marginalmeans", terms = c("age[18:70]", "sex"))
wd_all <- as.data.frame(wd_all)
corr_wd <- wd_all %>% dplyr::select(x, group, predicted)
colnames(corr_wd) <- c("age", "sex", "predicted_hair_time")

db <- merge(ht2, corr_wd, by = c("sex", "age"), all.x = TRUE)
db$hair_time_corrected <- db$hair_time - db$predicted_hair_time +
  wd_age$predicted[wd_age$x == round(mean(db$age))]

means2 <- db %>% group_by(workdays) %>% 
  summarise(mean_age = mean(age),
            se_hair_time = std.error(hair_time_corrected),
            mean_hair_time = mean(hair_time_corrected),
            n = n())
means2$x <- as.numeric(as.character(means2$workdays))

# Build workdays figure
workdays_fig <- ggplot() +
  geom_point(data = means, aes(x = x, y = mean_hair_time),
             color = "#a3bbbf", alpha = 0.5) +
  geom_errorbar(data = means, aes(x = x, ymin = mean_hair_time - se_hair_time, ymax = mean_hair_time + se_hair_time),
                linewidth = 0.3, width = 0.1, color = "#a3bbbf", alpha = 0.5) +
  geom_point(data = means2, aes(x = x, y = mean_hair_time)) +
  geom_errorbar(data = means2, aes(x = x, ymin = mean_hair_time - se_hair_time, ymax = mean_hair_time + se_hair_time),
                linewidth = 0.3, width = 0.1) +
  geom_text(data = means2, aes(label = n, x = workdays + 0.25, y = mean_hair_time + 0.35),
            color = "#ff4d00") +
  geom_line(data = eff_wd, aes(x = x, y = hair_time)) +
  geom_ribbon(data = eff_wd, aes(x = x, ymin = conf.low, ymax = conf.high), alpha = 0.2) +
  theme_bw() +
  scale_x_continuous("Workdays", breaks = seq(0, 7, 1)) +
  scale_y_continuous("Predicted DLMO (hours)",
                     breaks = c(-4, -3.5, -3, -2.5, -2, -1.5, -1, -0.5, 0),
                     labels = c("20:00", "20:30", "21:00",
                                "21:30", "22:00", "22:30",
                                "23:00", "23:30", "00:00"),
                     limits = c(-4, -0.6)) +
  theme(plot.background = element_rect(fill = "transparent", colour = "transparent"),
        plot.margin = margin(0.8, 0.8, 0.8, 0.6, "cm"),
        panel.border = element_rect(color = "black", fill = NA, linewidth = 1),
        axis.line = element_line(),
        panel.grid.minor.y = element_blank(),
        panel.grid = element_line(linewidth = 0.1),
        axis.text = element_text(size = 10),
        axis.title = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(vjust = -2),
        axis.title.y = element_text(vjust = 3),
        legend.position = "bottom")
workdays_fig